required_packages <- c(
"raster", "ncdf4", "sf", "ggplot2", "dplyr", "tmap",
"spdep", "knitr", "forecast", "gridExtra")
for (pkg in required_packages) {
if (!requireNamespace(pkg, quietly = TRUE)) {
install.packages(pkg)
}
suppressMessages(library(pkg, character.only = TRUE))
}
source("data/starima_package.R")
# open the first .nc file to get the uk area bound box
path <- 'data/tasmax_12km/tasmax_hadukgrid_uk_12km_mon_195301-195312.nc'
# open the NetCDF connection
nc <- nc_open(path)
# get the x,y bounds of data area
x_bnds <- ncvar_get(nc, 'projection_x_coordinate_bnds')
y_bnds <- ncvar_get(nc, 'projection_y_coordinate_bnds')
# close the NetCDF connection
nc_close(nc)
# create the sf data frame from polygons that are created by the bound box
# function of creation of polygon object from grid bounds
create_polygons_from_bounds <- function(x_bnds, y_bnds) {
lapply(seq_len(dim(x_bnds)[2]), function(i) {
lapply(seq_len(dim(y_bnds)[2]), function(j) {
# Counterclockwise starting from the lower left corner
polygon_coords <- matrix(c(x_bnds[1, i], y_bnds[1, j],
x_bnds[1, i], y_bnds[2, j],
x_bnds[2, i], y_bnds[2, j],
x_bnds[2, i], y_bnds[1, j],
x_bnds[1, i], y_bnds[1, j]),
byrow = TRUE, ncol = 2)
st_polygon(list(polygon_coords))
})
}) |> unlist(recursive = FALSE)
}
# create the polygons
polygons <- create_polygons_from_bounds(x_bnds, y_bnds)
# combine the list of polygons into a single 'sfc' (simple feature collection) object
sfc_polygons <- st_sfc(polygons)
# set the CRS for the polygons (EPSG:27700)
sfc_polygons <- st_set_crs(sfc_polygons, 27700)
# create an sf data frame
sf_df <- st_sf(geometry = sfc_polygons)
# write the tasmax values into the sf data frame, and create a matrix for following analysis
# directory where all the NetCDF files are located
file_dir <- "data/tasmax_12km"
file_names <- list.files(file_dir, pattern = "\\.nc$", full.names = TRUE)
# loop over each NetCDF file to construct the dataframe
for (file_path in file_names) {
# open the NetCDF file
nc <- nc_open(file_path)
# extract the year from the file name
year <- sub(".*/tasmax_hadukgrid_uk_12km_mon_(\\d{4}).*.nc$", "\\1", file_path)
# extract tasmax values for each month
for (month in 1:12) {
# construct the column name
column_name <- paste(year, sprintf("%02d", month), sep = "-")
# get the tasmax values for the current month
tasmax_values <- ncvar_get(nc, 'tasmax', start = c(1, 1, month), count = c(-1, -1, 1))
# number of rows (latitude/y) and columns (longitude/x)
num_y <- dim(tasmax_values)[2]
num_x <- dim(tasmax_values)[1]
# initialize a vector to hold the reordered data
reordered_tasmax_values <- vector("numeric", length = num_y * num_x)
# loop through each grid cell in the order of the sf object and assign values from tasmax
index <- 1
for (x in 1:num_x) {
for (y in 1:num_y) {
reordered_tasmax_values[index] <- tasmax_values[x, y]
index <- index + 1
}
}
# add it as a column to the sf object
sf_df[[column_name]] <- reordered_tasmax_values
}
# close the NetCDF file
nc_close(nc)
}
# remove rows with any NA values
sf_df_clean <- na.omit(sf_df)
# aggregate grid value to the England boroughs
Eng_boroughs <- st_read(dsn="data/England_borough.gpkg") %>% rename("geometry" = "geom")
## Reading layer `england_borough_simple' from data source
## `/Users/icarus/UCL/ucl_term2/CEGE0042/Assignment/coursework/data/England_borough.gpkg'
## using driver `GPKG'
## Simple feature collection with 312 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 131961 ymin: 11183.87 xmax: 655980.8 ymax: 657600.4
## Projected CRS: OSGB36 / British National Grid
Eng_boroughs <- Eng_boroughs['NAME']
Eng_features <- st_read(dsn="data/England_feature.gpkg") %>% rename("geometry" = "geom")
## Reading layer `England_feature' from data source
## `/Users/icarus/UCL/ucl_term2/CEGE0042/Assignment/coursework/data/England_feature.gpkg'
## using driver `GPKG'
## Simple feature collection with 312 features and 24 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 131961 ymin: 11183.87 xmax: 655980.8 ymax: 657600.4
## Projected CRS: OSGB36 / British National Grid
columns_to_aggregate <- names(sf_df_clean)[which(names(sf_df_clean) != "geometry")]
sf_aggregated <- Eng_boroughs %>%
st_join(sf_df_clean) %>%
filter(!if_any(all_of(columns_to_aggregate), is.na)) %>% # remove rows where any of the columns_to_aggregate is NA
group_by(NAME,geometry) %>%
summarize(
across(all_of(columns_to_aggregate), mean, na.rm = TRUE),
.groups = 'drop'
)
sf_aggregated1 <- left_join(sf_aggregated,as.data.frame(Eng_features)[,c('NAME', 'Elvation', 'latitude','prop_manmade_area','prop_forest','prop_water')],by = "NAME")
# drop the geometry column and then convert the data to a matrix
sf_aggregated_matrix<-data.matrix(st_drop_geometry(sf_aggregated[,-1]))
rownames(sf_aggregated_matrix) <- st_drop_geometry(sf_aggregated)$NAME
t_sf_aggregated_matrix <- t(sf_aggregated_matrix) # STACF etc. needs time series in columns
# data visualization
brks=quantile(as.numeric(unlist(sf_aggregated_matrix)), seq(0,1,0.1))
# the max temperature of January 2022
tm_shape(sf_aggregated)+
tm_fill("2022-01", style="fixed", palette="-Spectral",breaks=brks,
showNA= FALSE)+
# tm_borders("white")+
tm_compass(position=c("left","top"))+
tm_legend(position=c("left","bottom"))+
tm_scale_bar(breaks = c(0, 25, 50, 100,200),text.size=2) # +district_line
# the max temperature of July 2022
tm_shape(sf_aggregated)+
tm_fill("2022-07", style="fixed", palette="-Spectral",breaks=brks,
showNA= FALSE)+
# tm_borders("white")+
tm_compass(position=c("left","top"))+
tm_legend(position=c("left","bottom"))+
tm_scale_bar(breaks = c(0, 25, 50, 100,200),text.size=2) # +district_line
# Examining non spatio-temporal data characteristics
mu2 = mean(sf_aggregated_matrix)
mu2
## [1] 13.63693
sdev2 = sd(sf_aggregated_matrix)
sdev2
## [1] 5.489434
# Examining non spatio-temporal data characteristics
hist(sf_aggregated_matrix)
abline(v=mu2, col="red")
qqnorm(sf_aggregated_matrix)
qqline(sf_aggregated_matrix, col="red")
# Examining non spatio-temporal data characteristics
pairs(~Elvation+latitude+prop_forest+rowMeans(sf_aggregated_matrix),data=as.data.frame(sf_aggregated1),
main="Simple Scatterplot Matrix",
panel=function(x, y, ...) {
points(x, y, cex=0.5, ...)
}
)
# Examining temporal characteristics
plot(colMeans(sf_aggregated_matrix), xlab = "Month", ylab = "tasmax", type="l", xaxt="n")
axis(1, at = seq(0, 720, 120), labels=seq(1953, 2013, 10))
# Create the heatmap,with latitude descending from top to bottom
sf_aggregated1 <- left_join(sf_aggregated,as.data.frame(Eng_features)[,c('NAME', 'Elvation', 'latitude','prop_manmade_area','prop_forest','prop_water')],by = "NAME")
sf_latitude_order <- sf_aggregated1 %>% arrange(prop_water)
sf_latitude_ordermatrix <- data.matrix(as.data.frame(sf_latitude_order)[,3:842])
rownames(sf_latitude_ordermatrix) <- sf_latitude_order$NAME
heatmap(sf_latitude_ordermatrix,Rowv=NA,Colv=NA, col=heat.colors(256),scale="none", margins=c(5,3),xlab="Month", cexCol=1.1,y.scale.components.subticks(n=10))
# Examining spatial autocorrelation
W <- nb2listw(poly2nb(sf_aggregated),zero.policy = TRUE)
# two ways to do the statistical test of moran's I
moran.test(x=rowMeans(sf_aggregated_matrix), listw=W)
##
## Moran I test under randomisation
##
## data: rowMeans(sf_aggregated_matrix)
## weights: W
##
## Moran I statistic standard deviate = 25.002, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.920157307 -0.003246753 0.001364009
moran.mc(x=rowMeans(sf_aggregated_matrix), listw=W, nsim=9999)
##
## Monte-Carlo simulation of Moran I
##
## data: rowMeans(sf_aggregated_matrix)
## weights: W
## number of simulations + 1: 10000
##
## statistic = 0.92016, observed rank = 10000, p-value = 1e-04
## alternative hypothesis: greater
# Examining spatial autocorrelation
# local moran' I
Ii <- localmoran(x=rowMeans(sf_aggregated_matrix), listw=W)
eng_tasmax_sf <- sf_aggregated[,2]
eng_tasmax_sf$avg_tasmax <- as.data.frame(rowMeans(sf_aggregated_matrix))[,1]
eng_tasmax_sf$Ii <- Ii[,'Ii']
tm_shape(eng_tasmax_sf) +
tm_borders(lwd = 0.1)+
tm_polygons(col="Ii", palette="-RdBu", style="quantile",size= 0)
# unadjusted p-values
eng_tasmax_sf$Iip_unadjusted <- Ii[,"Pr(z != E(Ii))"]
eng_tasmax_sf$Ii_un_sig <- "nonsignificant"
eng_tasmax_sf$Ii_un_sig[which(eng_tasmax_sf$Iip_unadjusted < 0.05)] <- "significant"
# statistically significant units with the unadjusted p-value
# tm_shape(eng_tasmax_sf) +
# tm_borders(lwd = 0.1)+
# tm_polygons(col="Ii_un_sig", palette="-RdBu")
# apply the Bonferroni adjustment to the p-values directly using the p.adjust
eng_tasmax_sf$Iip_adjusted <- p.adjust(eng_tasmax_sf$Iip_unadjusted, method="bonferroni")
eng_tasmax_sf$Ii_ad_sig <- "nonsignificant"
eng_tasmax_sf$Ii_ad_sig[which(eng_tasmax_sf$Iip_adjusted < 0.05)] <- "significant"
# tm_shape(eng_tasmax_sf) +
# tm_borders(lwd = 0)+
# tm_polygons(col="Ii_ad_sig", palette="-RdBu")
# steps to get the clusters of high value and low value
moranCluster <- function(shape, W, var, alpha=0.05, p.adjust.method="bonferroni")
{
# Code adapted from https://rpubs.com/Hailstone/346625
Ii <- localmoran(shape[[var]], W)
shape$Ii <- Ii[,"Ii"]
Iip <- p.adjust(Ii[,"Pr(z != E(Ii))"], method=p.adjust.method)
shape$Iip <- Iip
shape$sig <- shape$Iip<alpha
# Scale the data to obtain low and high values
shape$scaled <- scale(shape[[var]]) # high low values at location i
shape$lag_scaled <- lag.listw(W, shape$scaled) # high low values at neighbours j
shape$lag_cat <- factor(ifelse(shape$scaled>0 & shape$lag_scaled>0, "HH",
ifelse(shape$scaled>0 & shape$lag_scaled<0, "HL",
ifelse(shape$scaled<0 & shape$lag_scaled<0, "LL",
ifelse(shape$scaled<0 & shape$lag_scaled<0, "LH", "Equivalent")))))
shape$sig_cluster <- as.character(shape$lag_cat)
shape$sig_cluster[!shape$sig] <- "Non-sig"
shape$sig_cluster <- as.factor(shape$sig_cluster)
results <- data.frame(Ii=shape$Ii, pvalue=shape$Iip, type=shape$lag_cat, sig=shape$sig_cluster)
return(list(results=results))
}
clusters <- moranCluster(eng_tasmax_sf, W=W, var="avg_tasmax")$results
eng_tasmax_sf$Ii_cluster <- clusters$sig
tm_shape(eng_tasmax_sf) + tm_borders(lwd = 0)+tm_polygons(col="Ii_cluster")
# Decomposition for City of London time series
ts_london <- ts(t_sf_aggregated_matrix[,'City and County of the City of London'], start=c(1953-01, 1), frequency=12)
decom_london <- stats::decompose(ts_london)
autoplot(decom_london)
# autocorrelation of a specific borough(London), max temperature series for the first 48 lags
acf(sf_aggregated_matrix['City and County of the City of London',], lag.max=48, xlab="Lag", ylab="ACF", main="Autocorrelation plot of monthly maximum air temperatures")
pacf(sf_aggregated_matrix['City and County of the City of London',], lag.max=48, xlab="Lag", ylab="PACF",main="Partial Autocorrelation plot")
T.s.diff <- diff(sf_aggregated_matrix['City and County of the City of London',], lag=12, differences=1)
acf(T.s.diff, lag.max=48, xlab="Lag", ylab="ACF", main="Differenced Autocorrelation plot")
# q candidates:[1,2,3/1,2,3,4,5]
pacf(T.s.diff, lag.max=48, xlab="Lag", ylab="PACF",main="Differenced Partial Autocorrelation plot")
# p candidates:[1/1,2]
# ARIMA(1,0,3)(2,1,5)12-----initial combination of parameters
fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,3),seasonal=list(order=c(2,1,5),period=12))
fit.ar # aic = 2914.9
##
## Call:
## arima(x = sf_aggregated_matrix["City and County of the City of London", 1:780],
## order = c(1, 0, 3), seasonal = list(order = c(2, 1, 5), period = 12))
##
## Coefficients:
## ar1 ma1 ma2 ma3 sar1 sar2 sma1 sma2
## 0.9989 -0.7475 -0.1611 -0.0637 0.9209 -0.8034 -1.9012 1.6673
## s.e. 0.0030 0.0363 0.0476 0.0368 0.1104 0.1626 0.1174 0.2457
## sma3 sma4 sma5
## -0.7369 -0.0875 0.0768
## s.e. 0.1794 0.0858 0.0419
##
## sigma^2 estimated as 2.403: log likelihood = -1445.45, aic = 2914.9
NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
NRMSE_fit # 0.2700663
## [1] 0.2700663
# several try came across different directions
# ARIMA(1,0,3)(0,1,5)12
fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,3),seasonal=list(order=c(0,1,5),period=12))
fit.ar # aic = 2914.49
##
## Call:
## arima(x = sf_aggregated_matrix["City and County of the City of London", 1:780],
## order = c(1, 0, 3), seasonal = list(order = c(0, 1, 5), period = 12))
##
## Coefficients:
## ar1 ma1 ma2 ma3 sma1 sma2 sma3 sma4
## 0.9987 -0.7483 -0.1609 -0.0633 -0.9733 -0.0425 0.0088 -0.0543
## s.e. 0.0031 0.0362 0.0475 0.0364 0.0390 0.0499 0.0503 0.0509
## sma5
## 0.0868
## s.e. 0.0351
##
## sigma^2 estimated as 2.421: log likelihood = -1447.24, aic = 2914.49
NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
NRMSE_fit # 0.2710715
## [1] 0.2710715
# ARIMA (1,0,4) (2,1,5)12
fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,4),seasonal=list(order=c(2,1,5),period=12))
fit.ar # aic = 2913.22
##
## Call:
## arima(x = sf_aggregated_matrix["City and County of the City of London", 1:780],
## order = c(1, 0, 4), seasonal = list(order = c(2, 1, 5), period = 12))
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 sar1 sar2 sma1
## 0.9988 -0.7510 -0.1346 -0.0232 -0.0639 1.5263 -0.8550 -2.5096
## s.e. 0.0030 0.0365 0.0460 0.0439 0.0362 0.0888 0.0849 0.0999
## sma2 sma3 sma4 sma5
## 2.3163 -0.8060 0.0313 -0.0262
## s.e. 0.1892 0.1488 0.1107 0.0448
##
## sigma^2 estimated as 2.387: log likelihood = -1443.61, aic = 2913.22
NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
NRMSE_fit # 0.2691755
## [1] 0.2691755
# ARIMA(1,0,3) (2,1,6)12----the selected combination
fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,3),seasonal=list(order=c(2,1,6),period=12))
fit.ar # aic = 2910.26
##
## Call:
## arima(x = sf_aggregated_matrix["City and County of the City of London", 1:780],
## order = c(1, 0, 3), seasonal = list(order = c(2, 1, 6), period = 12))
##
## Coefficients:
## ar1 ma1 ma2 ma3 sar1 sar2 sma1 sma2
## 0.9992 -0.7507 -0.1619 -0.0603 -1.7140 -0.7717 0.756 -0.9537
## s.e. 0.0028 0.0362 0.0477 0.0369 0.1017 0.1027 0.111 0.0592
## sma3 sma4 sma5 sma6
## -0.8414 -0.0597 0.0482 0.1161
## s.e. 0.1275 0.0584 0.0512 0.0432
##
## sigma^2 estimated as 2.363: log likelihood = -1442.13, aic = 2910.26
NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
NRMSE_fit # 0.2678122
## [1] 0.2678122
# # ADDITIONAL TRY-----------
# # ARI(1,0,0)(2,1,0)12
# fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,0),seasonal=list(order=c(2,1,0),period=12))
# fit.ar # aic = 3099.05
# NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
# NRMSE_fit # 0.3142105
#
# # IMA(0,0,3)(0,1,5)12
# fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(0,0,3),seasonal=list(order=c(0,1,5),period=12))
# fit.ar # aic = 2926.33
# NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
# NRMSE_fit # 0.2756825
#
# # ARIMA(1,0,4)(2,1,1)12
# fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,4),seasonal=list(order=c(2,1,1),period=12))
# fit.ar # aic = 2916.47
# NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
# NRMSE_fit # 0.2701743
#
# # ARIMA(1,0,4)(4,1,1)12
# fit.ar <- arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,4),seasonal=list(order=c(4,1,1),period=12))
# fit.ar # aic = 2913
# NRMSE_fit <- NRMSE(res=fit.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
# NRMSE_fit # 0.2703979
tsdiag(fit.ar)
fit.Ar <- Arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(1,0,3),seasonal=list(order=c(2,1,6),period=12))
pre.Ar <- Arima(sf_aggregated_matrix['City and County of the City of London',781:840], model=fit.Ar)
matplot(cbind(pre.Ar$fitted, pre.Ar$x), type="l")
NRMSE_fit1 <- NRMSE(res=pre.Ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',781:840])
NRMSE_fit1 # 0.2250566
## [1] 0.2250566
# Fitting the model using auto.arima
fit.auto.ar <- auto.arima(sf_aggregated_matrix['City and County of the City of London',1:780])
# fit.auto.ar # aic = 3244.75
NRMSE_fit2 <- NRMSE(res=fit.auto.ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',1:780])
NRMSE_fit2 # 0.3356572
## [1] 0.3356572
fit.auto.Ar <- Arima(sf_aggregated_matrix['City and County of the City of London',1:780],order=c(5,0,1))
pre.auto.Ar <- Arima(sf_aggregated_matrix['City and County of the City of London',781:840], model=fit.auto.Ar)
NRMSE_fit22 <- NRMSE(res=pre.auto.Ar$residuals, obs=sf_aggregated_matrix['City and County of the City of London',781:840])
NRMSE_fit22 # 0.3643618
## [1] 0.3643618
matplot(cbind(pre.auto.Ar$fitted, pre.auto.Ar$x), type="l")
nbrs <- poly2nb(sf_aggregated)
W1 <- nb2mat(nbrs)
Wlist <- nblag(nbrs, 3)
W2 <- nb2mat(Wlist[[2]])
W0 <- diag(x=1, nrow(W1), ncol(W1))
t_sf_aggregated_matrix.diff <- diff(t_sf_aggregated_matrix,lag=12,differences= 1)
# stacf(t_sf_aggregated_matrix.diff, W0, 120)
# ACF
stacf(t_sf_aggregated_matrix, W1, 60)
## $stacf
## [,1]
## [1,] 0.9999290777
## [2,] 0.8169726479
## [3,] 0.4687174737
## [4,] 0.0110568493
## [5,] -0.4446868245
## [6,] -0.7686059935
## [7,] -0.8837675556
## [8,] -0.7643580206
## [9,] -0.4393905430
## [10,] 0.0112265452
## [11,] 0.4646421308
## [12,] 0.8004143850
## [13,] 0.9240301916
## [14,] 0.7997389802
## [15,] 0.4565081518
## [16,] -0.0022004818
## [17,] -0.4472335567
## [18,] -0.7702766309
## [19,] -0.8882387225
## [20,] -0.7654490940
## [21,] -0.4455266179
## [22,] 0.0063584292
## [23,] 0.4615246107
## [24,] 0.7965523704
## [25,] 0.9211665281
## [26,] 0.7996993195
## [27,] 0.4598058376
## [28,] 0.0085338856
## [29,] -0.4377617504
## [30,] -0.7656404490
## [31,] -0.8851801011
## [32,] -0.7666456501
## [33,] -0.4448661219
## [34,] 0.0078345007
## [35,] 0.4640311227
## [36,] 0.7966409156
## [37,] 0.9182925546
## [38,] 0.7905333223
## [39,] 0.4544180378
## [40,] 0.0018943912
## [41,] -0.4517015912
## [42,] -0.7750635707
## [43,] -0.8942210552
## [44,] -0.7729210599
## [45,] -0.4364447751
## [46,] 0.0097535012
## [47,] 0.4628237817
## [48,] 0.7998780818
## [49,] 0.9158395831
## [50,] 0.7932724486
## [51,] 0.4549562664
## [52,] -0.0005737165
## [53,] -0.4510442644
## [54,] -0.7797966651
## [55,] -0.8937585343
## [56,] -0.7681247032
## [57,] -0.4379995833
## [58,] 0.0176411661
## [59,] 0.4720303111
## [60,] 0.8002646188
## [61,] 0.9221265476
##
## $ubound
## [1] 0.004073835
##
## $lbound
## [1] -0.004073835
# ACF-diff
stacf(t_sf_aggregated_matrix.diff, W1, 60)
## $stacf
## [,1]
## [1,] 9.995814e-01
## [2,] 2.286773e-01
## [3,] 1.119496e-01
## [4,] 8.697510e-02
## [5,] -5.807124e-03
## [6,] -2.342842e-02
## [7,] 2.099066e-02
## [8,] 2.585720e-02
## [9,] 5.370462e-02
## [10,] 2.308844e-02
## [11,] -9.384988e-03
## [12,] -8.033242e-02
## [13,] -4.733424e-01
## [14,] -1.058099e-01
## [15,] -8.687575e-02
## [16,] -1.453961e-01
## [17,] -7.815240e-02
## [18,] -2.980636e-02
## [19,] -3.802736e-02
## [20,] 1.169057e-02
## [21,] -2.333079e-02
## [22,] -1.986062e-02
## [23,] -3.961391e-02
## [24,] -4.674311e-02
## [25,] -2.194464e-02
## [26,] 3.822934e-02
## [27,] 2.030813e-02
## [28,] 9.281782e-02
## [29,] 1.369636e-01
## [30,] 8.868310e-02
## [31,] 8.727283e-02
## [32,] 3.883168e-02
## [33,] -6.075257e-02
## [34,] -1.692273e-02
## [35,] 2.884737e-02
## [36,] 2.665530e-03
## [37,] 9.723611e-03
## [38,] -6.362536e-02
## [39,] -1.363086e-03
## [40,] 1.385593e-05
## [41,] -8.311346e-02
## [42,] -5.186458e-02
## [43,] -9.719636e-02
## [44,] -1.077519e-01
## [45,] 4.766322e-02
## [46,] -5.899173e-02
## [47,] -9.184349e-02
## [48,] -7.325033e-03
## [49,] -6.413842e-02
## [50,] 1.849988e-02
## [51,] 6.412118e-03
## [52,] -1.873746e-02
## [53,] 5.151089e-02
## [54,] -9.082154e-03
## [55,] 1.483744e-02
## [56,] 4.546322e-02
## [57,] 1.291029e-02
## [58,] 1.069642e-01
## [59,] 1.298391e-01
## [60,] 3.126883e-02
## [61,] 5.445497e-02
##
## $ubound
## [1] 0.004105539
##
## $lbound
## [1] -0.004105539
# PACF
stpacf(t_sf_aggregated_matrix, W1, 60)
## $stpacf
## [,1]
## [1,] -3.849070e-04
## [2,] -2.136039e-03
## [3,] 1.261786e-03
## [4,] 2.212410e-03
## [5,] 2.825496e-03
## [6,] 1.971271e-03
## [7,] 8.584163e-04
## [8,] 7.076142e-04
## [9,] 1.564163e-03
## [10,] 9.049861e-04
## [11,] -2.775790e-04
## [12,] -1.045866e-03
## [13,] 1.549707e-03
## [14,] -6.536937e-05
## [15,] -2.384886e-04
## [16,] 4.234760e-04
## [17,] 5.853537e-04
## [18,] -2.045278e-04
## [19,] 2.516246e-04
## [20,] -8.373397e-04
## [21,] 1.302730e-03
## [22,] 6.291935e-04
## [23,] 3.210108e-04
## [24,] -1.387848e-03
## [25,] 1.873651e-03
## [26,] 2.818730e-04
## [27,] 1.430196e-03
## [28,] 9.187508e-06
## [29,] 2.755650e-04
## [30,] -2.543481e-04
## [31,] 2.687892e-04
## [32,] -8.208603e-04
## [33,] 1.503462e-03
## [34,] 8.018407e-04
## [35,] -5.580112e-04
## [36,] -1.732145e-03
## [37,] 4.459371e-04
## [38,] 1.643785e-04
## [39,] 3.661412e-04
## [40,] -2.239342e-03
## [41,] -4.724246e-05
## [42,] 8.545967e-05
## [43,] 7.603769e-04
## [44,] 7.329521e-04
## [45,] 5.910613e-04
## [46,] 1.935461e-04
## [47,] 5.586757e-04
## [48,] -1.521238e-03
## [49,] 1.715927e-03
## [50,] -1.150768e-04
## [51,] 5.161281e-04
## [52,] -9.078895e-04
## [53,] 2.505487e-04
## [54,] -4.533890e-04
## [55,] 9.376680e-04
## [56,] -3.774466e-04
## [57,] 1.089926e-03
## [58,] 8.336141e-04
## [59,] 1.279946e-03
## [60,] -1.470980e-03
##
## $ubound
## [1] 0.06904767
##
## $lbound
## [1] -0.06904767
# PACF-diff
stpacf(t_sf_aggregated_matrix.diff,W1,60)
## $stpacf
## [,1]
## [1,] -3.091437e-04
## [2,] 1.281623e-04
## [3,] 9.132655e-05
## [4,] -1.091403e-03
## [5,] -7.104038e-04
## [6,] 7.392717e-04
## [7,] -1.452191e-04
## [8,] -4.702894e-06
## [9,] -2.786141e-04
## [10,] 5.588406e-05
## [11,] -1.136630e-03
## [12,] 2.394945e-04
## [13,] -2.236226e-05
## [14,] -4.846903e-04
## [15,] -1.624324e-03
## [16,] -6.809392e-04
## [17,] -2.758485e-04
## [18,] 4.754280e-04
## [19,] 4.731520e-05
## [20,] -4.663121e-04
## [21,] -4.824278e-05
## [22,] -3.629338e-04
## [23,] -2.442398e-04
## [24,] 3.982024e-04
## [25,] 1.175675e-03
## [26,] -4.400386e-04
## [27,] -3.114557e-04
## [28,] 1.291379e-03
## [29,] 2.866187e-04
## [30,] 6.791798e-04
## [31,] -3.618677e-04
## [32,] -1.887685e-03
## [33,] 2.988513e-04
## [34,] 1.821066e-04
## [35,] -9.750323e-04
## [36,] 2.871136e-04
## [37,] -4.238351e-04
## [38,] -1.533457e-04
## [39,] 2.148763e-05
## [40,] -1.741711e-04
## [41,] -8.865834e-05
## [42,] 6.955403e-04
## [43,] -4.321576e-04
## [44,] -3.573184e-04
## [45,] -2.798004e-04
## [46,] -6.596482e-04
## [47,] -1.322149e-03
## [48,] -5.775746e-05
## [49,] 8.162509e-04
## [50,] -2.887874e-05
## [51,] 1.016208e-03
## [52,] 6.106386e-04
## [53,] -7.002192e-04
## [54,] 1.798726e-04
## [55,] -4.899561e-04
## [56,] 4.153639e-04
## [57,] 2.340743e-04
## [58,] -1.417819e-03
## [59,] -9.156211e-04
## [60,] -5.408306e-04
##
## $ubound
## [1] 0.06954681
##
## $lbound
## [1] -0.06954681
W_fit<-list(w1=W1) # Create a list of spatial weight matrices, zero not needed
# W_fit$w2 <- W2
fit.star101 <- starima_fit(Z=t_sf_aggregated_matrix[1:780,],W=W_fit,p=1,d=12,q=1)
fit.star102 <- starima_fit(Z=t_sf_aggregated_matrix[1:780,],W=W_fit,p=1,d=12,q=2)
fit.star103 <- starima_fit(Z=t_sf_aggregated_matrix[1:780,],W=W_fit,p=1,d=12,q=3)
fit.star001 <- starima_fit(Z=t_sf_aggregated_matrix[1:780,],W=W_fit,p=0,d=12,q=1)
fit.star100 <- starima_fit(Z=t_sf_aggregated_matrix[1:780,],W=W_fit,p=1,d=12,q=0)
fit.star101$NRMSE[56] # 0.3812036
## [1] 0.3812036
fit.star102$NRMSE[56] # 0.3814073
## [1] 0.3814073
fit.star103$NRMSE[56] # 0.3814507
## [1] 0.3814507
fit.star001$NRMSE[56] # 0.3912485
## [1] 0.3912485
fit.star100$NRMSE[56] # 0.3806741 the selected one
## [1] 0.3806741
fit.star100$NRMSE[56]
## [1] 0.3806741
stacf(fit.star100$RES,W1,48)
## $stacf
## [,1]
## [1,] 0.999557362
## [2,] -0.017462444
## [3,] 0.063561127
## [4,] 0.059336483
## [5,] -0.022228503
## [6,] -0.023005605
## [7,] 0.018907577
## [8,] -0.004452851
## [9,] 0.060709005
## [10,] 0.030220758
## [11,] -0.004711100
## [12,] 0.049413949
## [13,] -0.470303979
## [14,] 0.004562637
## [15,] -0.054697701
## [16,] -0.129042109
## [17,] -0.055298692
## [18,] -0.004984029
## [19,] -0.034223244
## [20,] 0.046242887
## [21,] -0.039347854
## [22,] -0.025301238
## [23,] -0.027501872
## [24,] -0.058370832
## [25,] -0.033533346
## [26,] 0.063029396
## [27,] 0.010009803
## [28,] 0.087542898
## [29,] 0.120555072
## [30,] 0.043103752
## [31,] 0.069810373
## [32,] 0.019673195
## [33,] -0.058716917
## [34,] 0.002415902
## [35,] 0.034191559
## [36,] -0.007401483
## [37,] 0.023994561
## [38,] -0.084401384
## [39,] -0.004146078
## [40,] 0.017040948
## [41,] -0.080377359
## [42,] -0.009917727
## [43,] -0.073756230
## [44,] -0.110362638
## [45,] 0.069381836
## [46,] -0.055597180
## [47,] -0.079457702
## [48,] 0.041195421
## [49,] -0.066533418
##
## $ubound
## [1] 0.00424313
##
## $lbound
## [1] -0.00424313
hist(fit.star100$RES[,56])
Box.test(fit.star100$RES[,56],lag=1, type="Ljung") # p= 0.7233
##
## Box-Ljung test
##
## data: fit.star100$RES[, 56]
## X-squared = 0.12535, df = 1, p-value = 0.7233
pre.star <- starima_pre(t_sf_aggregated_matrix[(780-12-1+1):840,],model=fit.star100)
matplot(1:60,cbind(t_sf_aggregated_matrix[781:840,56],pre.star$PRE[,56]),type="l")
# show the forecast accuracy of the STARIMA model vary across the study area
df <- as.data.frame(pre.star$PRE['2021-07',])
df$OBS <- as.vector(pre.star$OBS['2021-07',])
colnames(df)[1:2] <- c("PRE", "OBS")
df <- df %>%
mutate(differ = PRE - OBS)
df$NAME <- rownames(df)
sf_aggregated2 <- sf_aggregated1[,1]
sf_aggregated2 <- sf_aggregated2 %>%
left_join(df, by = "NAME")
ggplot(sf_aggregated2) +
geom_sf(aes(fill = differ)) +
scale_fill_viridis_c() +
theme_minimal() +
labs(fill = "Difference")